import json
import os
from tqdm import tqdm
import pandas as pd
import re

path_dir = os.path.dirname(os.path.abspath(__file__))

if __name__ == '__main__':
    phase18_model = 'qwen3-max'
    with open(os.path.join(path_dir, "phase1.8_output", f"{phase18_model}_prop_desc.jsonl"), 'r',
              encoding='utf-8') as fp:
        lines = fp.readlines()
        phase18_data = [json.loads(line) for line in lines]

    output_fp = open(os.path.join(path_dir, "phase2.5_output", f"{phase18_model}_prop_desc.jsonl"), "w",
                     encoding='utf-8')

    for prop_desc in tqdm(phase18_data):
        desc = prop_desc.pop('answer')
        # 有定义
        if prop_desc['define_with_ref']:
            new_define = prop_desc['define_with_ref'].strip()
            old_define = re.search('定义：(.+?)关键要素：', desc, re.S).group(1).strip()
            desc = desc.replace(old_define, new_define)
        prop_desc['desc'] = desc
        output_fp.write(json.dumps(prop_desc, ensure_ascii=False) + '\n')
    output_fp.close()
